{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Warning:\n",
      "No module named 'experiment_manager'\n",
      "This is not a required package! However, there might be a couple of functions which could be helpful there!\n",
      "Might get it at https://github.com/lucfra/ExperimentManager\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ordered-set package not found..\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import far_ho as far"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def test_problem():\n",
    "    w = tf.get_variable('w', initializer=[1., 2., -1., -5.])\n",
    "\n",
    "    e = far.get_hyperparameter('e', initializer=[3., 1.1, 1., 0.1])\n",
    "\n",
    "    io = e*w**2 + (w - [0., -1., 12., 18.])**2\n",
    "    oo = (w - [0.1, -1.1, 12.1, 18.1])**2\n",
    "    return w, e, io, oo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_test_reverseHG(inner_optimizer_learning_rate=0.1, iterations=10):\n",
    "    # initialize everything \n",
    "    tf.reset_default_graph()\n",
    "    try: tf.get_default_session().close()\n",
    "    except: pass\n",
    "    ss = tf.InteractiveSession()\n",
    "\n",
    "    # hypergradient with ReverseHG\n",
    "    w, e, io, oo = test_problem()\n",
    "\n",
    "    ho = far.HyperOptimizer()\n",
    "    ts = ho.minimize(oo, far.GradientDescentOptimizer(0.1), io, \n",
    "                     far.GradientDescentOptimizer(inner_optimizer_learning_rate))\n",
    "\n",
    "    tf.global_variables_initializer().run()\n",
    "    \n",
    "    ts(iterations)\n",
    "    \n",
    "    return w, e, io, oo, ss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([ 0.       ,  0.2829068, 36.599995 , 51.660515 ], dtype=float32)]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "w, e, io, oo, ss = run_test_reverseHG(iterations=100)\n",
    "ss.run(far.hypergradients())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_test_implicit_hg(inner_optimizer_learning_rate, iterations=10):\n",
    "    # initialize everything \n",
    "    tf.reset_default_graph()\n",
    "    try: tf.get_default_session().close()\n",
    "    except: pass\n",
    "    ss = tf.InteractiveSession()\n",
    "\n",
    "    # hypergradient with ReverseHG\n",
    "    w, e, io, oo = test_problem()\n",
    "\n",
    "    ho = far.HyperOptimizer(far.ImplicitHG())  # leave all default values!\n",
    "    ts = ho.minimize(oo, far.GradientDescentOptimizer(0.1), io, \n",
    "                     far.GradientDescentOptimizer(inner_optimizer_learning_rate),\n",
    "                    global_step=tf.Variable(0, trainable=False)) \n",
    "    # Note that the global step is necessary since in HOAG you need a decreasing sequence for the tolerance \n",
    "    # (even though here we make only one step of hyperparameter optimization....)\n",
    "    # Note 2: in HOAG paper the optimizer used for soliving the inner problem was BFGS.. which is a bit difficult \n",
    "    # to use with tensorflow.. so here I just use plain gradient descent\n",
    "    \n",
    "    tf.global_variables_initializer().run()\n",
    "    # NOTE 2: the condition on the quality of the solution of the inner problem here is not checked! \n",
    "    # in HOAG papar that was linked to the tolerance (please see the paper..)\n",
    "    # Informally speanking, if the inner objective is strongly convex, checking how far the current iterate\n",
    "    # is from the global minimizer is equivalent to checking that the norm of the gradient is small enough.\n",
    "    # it would be possible to implement this feature (which is quite important I suppose, especially when you do many \n",
    "    # ho steps) by implementing a proper `generator` that checks whether the \n",
    "    # condition is verified or not, and if not goes on.. something like this: \n",
    "    \n",
    "#     def check_cond():\n",
    "#         t = 0\n",
    "#         if ||_grad|| > ho.hypergradient.tol(global_step):\n",
    "#             yield t\n",
    "#             t + =1\n",
    "    \n",
    "    # and pass it to `ts` instead of `iterations`.\n",
    "\n",
    "    ts(iterations)\n",
    "    return w, e, io, oo, ss\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Optimization terminated with:\n",
      "  Message: Desired error not necessarily achieved due to precision loss.\n",
      "  Objective function value: 0.000001\n",
      "  Number of iterations: 31\n",
      "  Number of functions evaluations: 156\n",
      "inner variable: [ 1.0239992e-07 -4.6552294e-01  5.9576735e+00  1.4582811e+01]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[array([5.1199818e-09, 2.8129867e-01, 3.6593979e+01, 9.3255455e+01],\n",
       "       dtype=float32)]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "w, e, io, oo, ss  = run_test_implicit_hg(0.1, iterations=10) \n",
    "\n",
    "print('inner variable:', w.eval())  # reasonable value\n",
    "ss.run(far.hypergradients())  # reasonable as well. \n",
    "# by increasing the number of iterations from 10 to 100 and 1000 and so on you see that the hypergradient converges"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Optimization terminated with:\n",
      "  Message: Desired error not necessarily achieved due to precision loss.\n",
      "  Objective function value: 564951424.000000\n",
      "  Number of iterations: 0\n",
      "  Number of functions evaluations: 74\n",
      "inner variable: [ 2.8247526e+08  2.7879388e+05 -4.1333700e+05 -1.1591436e+02]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[array([-0., -0.,  0.,  0.], dtype=float32)]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "w, e, io, oo, ss  = run_test_implicit_hg(1, iterations=10) \n",
    "\n",
    "print('inner variable:', w.eval())  # not so reasonable value.. inner optimization is not converging...\n",
    "ss.run(far.hypergradients())  # all 0 \n",
    "# the problem is this: `Objective function value: 564951424.000000` \n",
    "\n",
    "#here the step size of inner optimizer is too high!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "IMPLICIT HYPERGRAD\n",
      "INFO:tensorflow:Optimization terminated with:\n",
      "  Message: Desired error not necessarily achieved due to precision loss.\n",
      "  Objective function value: 0.000000\n",
      "  Number of iterations: 32\n",
      "  Number of functions evaluations: 84\n",
      "INNER VAR: [ 0.         -0.47619042  5.9999995  16.363632  ] \n",
      " HYPERGRAD [array([ 0.       ,  0.2829068, 36.600002 , 51.660526 ], dtype=float32)]\n",
      "\n",
      "REVERSE HYPERGRAD\n",
      "INNER VAR: [ 0.         -0.47619042  5.9999995  16.363632  ] \n",
      " HYPERGRAD [array([ 0.       ,  0.2829068, 36.599995 , 51.660515 ], dtype=float32)]\n"
     ]
    }
   ],
   "source": [
    "# as a side-dish.. when increasing the number of iteration the 2 methods (in this case)\n",
    "# compute (almost) the same hypergrad!\n",
    "print('IMPLICIT HYPERGRAD')\n",
    "w, e, io, oo, ss  = run_test_implicit_hg(0.1, iterations=1000) \n",
    "\n",
    "\n",
    "print('INNER VAR:', w.eval(), '\\n HYPERGRAD', ss.run(far.hypergradients()))\n",
    "\n",
    "print('\\nREVERSE HYPERGRAD')\n",
    "w, e, io, oo, ss  = run_test_reverseHG(iterations=1000) \n",
    "\n",
    "\n",
    "print('INNER VAR:', w.eval(), '\\n HYPERGRAD', ss.run(far.hypergradients()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
